ufo_data = read_csv(file = "./data/tidied_data_final.csv")
## Parsed with column specification:
## cols(
## latitude = col_double(),
## longitude = col_double(),
## city_description = col_character(),
## ufo_shape = col_character(),
## encounter_length = col_double(),
## described_encounter_length = col_character(),
## description = col_character(),
## date_documented = col_character(),
## country = col_character(),
## state = col_character(),
## city = col_character(),
## X12 = col_logical(),
## date = col_character(),
## time = col_time(format = "")
## )
#%>% select(-X12)
ufo =
ufo_data %>%
#na.omit(ufo_data) %>%
#separate(date_time, into = c( "date","time"), sep = " " ) %>%
separate(date, into = c("month","day","year"), sep = "/")
## A bargraph showing the total number of UFO observed among 7 countries.
ufo %>%
group_by(country) %>%
count() %>%
ungroup()%>%
mutate(precentage = n/sum(pull(., n)),
country = factor(country),
country = forcats::fct_reorder(country, n)) %>%
ggplot(aes(x = country, y = precentage , fill = country, label = scales::percent(precentage))) +
geom_col(position = 'dodge') +
geom_text(position = position_dodge(width = .9),
vjust = -0.5, size = 3) +
labs(title = "Bargraph of the precentage of UFO Observed by Country",
x = "Country",y = "Precentage")
Comment: According to the graph above, we can see that most of the data come from the United Statesa and some of them come from Canada. Australia and the United Kingdom contributed very little amount of data.
## line plot showing the number of UFO observed across time among countries
ufo %>%
group_by(year) %>%
count(country) %>%
ungroup() %>%
mutate(year = as.numeric(year)) %>%
ggplot(aes(x = year,y = n,group = country)) +
geom_line(aes(color = country)) +
theme(axis.text.x = element_text(angle = 90)) +
labs(
title = " Line Plot of Number of UFO Observed Across Time Among different Countries ",
x = "Year",
y = "Number Of UFO Observed"
)
Comment: In this plot, we can see that for the United States the earliest data happens before 1925 and sometime after 1960 the amount of UFO sighting begins to increase. We can also see that most of UFO sighting reports taken place around 1990 and after. Similar trend for Canada, but the amount of them is not as much as the United States.
### Boxplot showing the distribution of the UFO Shape Observed in US
ufo %>%
filter(country == "USA",year >= 1950) %>%
group_by(state) %>%
count(ufo_shape) %>%
arrange(desc(n)) %>%
ungroup %>%
mutate(
ufo_shape = factor(ufo_shape),
ufo_shape = forcats::fct_reorder(ufo_shape, n)) %>%
ggplot(aes(x = ufo_shape, y = n, group = ufo_shape)) +
geom_boxplot(aes(color =ufo_shape)) +
theme(axis.text.x = element_text(angle = 300)) +
labs(
title = "Boxplot showing distribution of the UFO Shape Observed in US ",
x = "UFO Shape",
y = "Number"
)
Comment: According to this plot, we can see that the light UFO shape was sighted most often, and the changed shape was the least often according to their medium value. Some of the most common sighted UFO shapes are light, trianle, circle, unknown, other, and fireball, disk, and sphere. Some of the least common sighted UFO shapes are changed crescent, delta, flare, hezagon, pyramid and round.
## USA
ufo %>%
filter(country == "USA", year >= 1950, encounter_length <6000) %>%
select(year, month, encounter_length) %>%
mutate(year = as.numeric(year),
month = as.numeric(month)) %>%
group_by(year,month) %>%
arrange(desc(month)) %>%
mutate(
ave_encountered_length = mean(encounter_length/60)) %>%
select(-encounter_length) %>%
ungroup() %>%
mutate(month = month.name[as.numeric(month)]) %>%
ggplot(aes(x = year, y = month)) +
geom_tile(aes(fill = ave_encountered_length), colour = "white") +
scale_fill_gradient(low = "lightyellow", high = "darkblue") +
scale_x_continuous(name="Years",
breaks = c(1950, 1955,1960,1965,1970,1975,1980,1985,1990,1995,2000,2005,2010,2015
), limits= c (1950,2015)) +
labs(
title = "Average UFO Encounter Length Compared among Months Across Time in USA ",
x = "Years",
y = "Month"
) +
theme(axis.text.x = element_text(angle = 300)) +
theme(plot.subtitle = element_text(hjust = 0.5))

#### mapping
US =
ufo %>%
filter(country == "USA") %>%
group_by(state) %>%
summarise(Num.UFO = n())
states = readOGR("data/cb_2018_us_state_500k.shp")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/SigL/Documents/academic/Fall2019/DataScience/final_project/data/cb_2018_us_state_500k.shp", layer: "cb_2018_us_state_500k"
## with 56 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
#check for the matching bewtween shapefile and my US dataset
is.element(US$state,(states$STUSPS))
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [49] TRUE TRUE FALSE TRUE TRUE TRUE
is.element(states$STUSPS,US$state)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# missing 14,38,39,46,45 so to create new_states df with only matching states
new_states = subset(states, is.element(states$STUSPS,US$state))
is.element(new_states$STUSPS,US$state)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE
is.element(US$state,new_states$STUSPS)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [49] TRUE TRUE FALSE TRUE TRUE TRUE
## make it into same order
US = US[order(match(US$state, (new_states$STUSPS))),]
# set for the range based on number being observed--discrete
# bins = c(0,50,100,150,200,250,300,350,400,450,500,550)
# pal = colorBin("YlOrRdBu",domain = US$Num.observed,bins = bins)
pal <- colorNumeric(palette = "Reds",domain = US$Num.UFO)
labels = paste( "<p>","Number of UFO Observed Across Time :", US$Num.UFO,"</p>",
sep = "")
leaflet() %>%
setView(-96,37.8,4) %>%
addTiles() %>%
addPolygons(data = states,
weight = 1,
smoothFactor = 0.2,
color = "white",
fillOpacity = 0.75,
fillColor = pal(US$Num.UFO),
label = lapply(labels, HTML)
) %>%
addLegend(pal = pal,
values = US$Num.UFO,
opacity = 0.7,
position = "topright")
Copyright © 2019 UFO Analysis Group All rights reserved.